import pkuseg
import gensim
import re
import openpyxl

excel_file="D://药剂.xlsx"
wb = openpyxl.load_workbook(excel_file)
sh= wb['工作表1']

re_de= re.compile(u'[\、\，\。\；\+\ ]')
dict5 = dict()
site=0


for i in range(2,202):
    s= str(sh.cell(i,21).value)
    blocks=re_de.split(s)
    for ch in range(0,len(blocks)):
        if u'\u4e00' <= blocks[ch] <= u'\u9fff':
            if blocks[ch] not in dict5.keys():
                dict5[blocks[ch]]=site
                site+=1
f.close()
print(dict5)